import os
import csv
import locale
from numpy import *
from scipy.interpolate import interp1d
from pylab import *

from matplotlib.ticker import NullFormatter


directory = "/osc-fs_home/mdehoon/Data/CASPARs/HiSeq/Bioanalyzer/"

locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') 

def read_ladder(dataset):
    filename = "%s_Results.csv" % dataset
    path = os.path.join(directory, dataset, filename)
    print("Reading", path)
    with open(path, encoding='latin1') as stream:
        rows = csv.reader(stream)
        sample_found = False
        table_found = False
        table_started = False
        sizes = []
        times = []
        for row in rows:
            if row == ['Sample Name', 'Ladder']:
                sample_found = True
            elif not sample_found:
                continue
            elif row == ['Peak Table']:
                table_found = True
            elif not table_found:
                continue
            elif not table_started:
                assert row == ['Size [bp]', 'Conc. [pg/µl]', 'Molarity [pmol/l]', 'Observations', 'Area', 'Aligned Migration Time [s]', 'Peak Height', 'Peak Width', '% of Total', 'Time corrected area']
                table_started = True
            elif len(row) == 1:
                break
            else:
                size = locale.atoi(row[0])
                time = float(row[5])
                sizes.append(size)
                times.append(time)
    times = array(times)
    sizes = array(sizes)
    interpolator = interp1d(times, sizes, kind='linear', fill_value='extrapolate')
    return interpolator

def read_data(dataset, sample, name):
    filename = "%s_%s.csv" % (dataset, sample)
    path = os.path.join(directory, dataset, filename)
    print("Reading", path)
    with open(path, encoding='latin1') as stream:
        rows = csv.reader(stream)
        data_found = False
        values = []
        times = []
        for row in rows:
            if row == ['Time', 'Value']:
                data_found = True
            elif row[0] == "Sample Name":
                assert name == row[1]
            elif not data_found:
                continue
            elif len(row) == 1:
                break
            else:
                time = float(row[0])
                value = float(row[1])
                times.append(time)
                values.append(value)
    times = array(times)
    values = array(values)
    return times, values


datasets = ("2100 expert_High Sensitivity DNA Assay_DE72901295_2016-03-30_14-27-12",
            "2100 expert_High Sensitivity DNA Assay_DE72901295_2016-03-30_15-14-11",
            "2100 expert_High Sensitivity DNA Assay_DE20901421_2016-04-18_17-19-17",
           )

fig = figure()

interpolator = read_ladder(datasets[0])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("1_1/11 dilution", "2_1/8 dilution", "3_1/8 dilution", "4_1/7 dilution", "5_1/8 dilution", "6_no dilution", "7_1/6 dilution", "8_1/7 dilution", "9_1/7 dulution")

label = "Before size\nselection"
for sample, name in zip(samples, names):
    times, values = read_data(datasets[0], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    semilogx(sizes, values, color='blue', alpha=0.5, label=label)
    label = None

interpolator = read_ladder(datasets[1])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("10_1/8 dilution", "11_1/7 dilution", "12_1/8 dilution", "13_1/9 dilution", "14_1/7 dilution", "15_1/6 dilution", "16_1/6 dilution", "17_1/7 dilution", "18_1/7 dilution")

for sample, name in zip(samples, names):
    times, values = read_data(datasets[1], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    semilogx(sizes, values, color='blue', alpha=0.5)

interpolator = read_ladder(datasets[2])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample4", "Sample5")
names = ("Caspar", "Caspar")
label = "After size\nselection"
for sample, name in zip(samples, names):
    times, values = read_data(datasets[2], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    semilogx(sizes, values, color='purple', alpha=0.5, label=label)
    label = None

ymin, ymax = ylim()
plot([200, 200], [ymin, ymax], 'r--')
plot([390, 390], [ymin, ymax], 'r--', label='Selected\nsize limits')
sizes = (50, 100, 200, 300, 400, 600, 1000, 2000, 4000, 7000)
labels = [str(size) for size in sizes]
xticks(sizes, labels, fontsize=8)
yticks(fontsize=8)
xlabel("Size [bp]")
ylabel("Fluorescence units")
legend()
xlim(sizes[0], sizes[-1])
ylim(ymin, ymax)

filename = "figure_bioanalyzer_hiseq.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_hiseq.png"
print("Saving figure as %s" % filename)
savefig(filename)

fig = figure()

interpolator = read_ladder(datasets[0])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("1_1/11 dilution", "2_1/8 dilution", "3_1/8 dilution", "4_1/7 dilution", "5_1/8 dilution", "6_no dilution", "7_1/6 dilution", "8_1/7 dilution", "9_1/7 dulution")

label = "Before size selection"
for sample, name in zip(samples, names):
    times, values = read_data(datasets[0], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    plot(sizes, values, color='blue', alpha=0.5, label=label)
    label = None

interpolator = read_ladder(datasets[1])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("10_1/8 dilution", "11_1/7 dilution", "12_1/8 dilution", "13_1/9 dilution", "14_1/7 dilution", "15_1/6 dilution", "16_1/6 dilution", "17_1/7 dilution", "18_1/7 dilution")

for sample, name in zip(samples, names):
    times, values = read_data(datasets[1], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    plot(sizes, values, color='blue', alpha=0.5)

interpolator = read_ladder(datasets[2])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample4", "Sample5")
names = ("Caspar", "Caspar")
label = "After size selection"
for sample, name in zip(samples, names):
    times, values = read_data(datasets[2], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    plot(sizes, values, color='purple', alpha=0.5, label=label)
    label = None

sizes = (200, 250, 300, 350, 400)
labels = [str(size) for size in sizes]
xticks(sizes, labels, fontsize=8)
yticks(fontsize=8)

ymin, ymax = ylim()
ymax = 250
plot([200, 200], [ymin, ymax], 'r--')
plot([390, 390], [ymin, ymax], 'r--', label='Selected size limits')
xlabel("Size [bp]")
ylabel("Fluorescence units")

xlim(180, 420)
ylim(ymin, ymax)
legend(bbox_to_anchor=(0.1, 0.98))

filename = "figure_bioanalyzer_hiseq_zoomed.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_hiseq_zoomed.png"
print("Saving figure as %s" % filename)
savefig(filename)


fig = figure(figsize=(6,12))

ax = fig.add_subplot(111)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='w', top=False, bottom=False, left=False, right=False)
ax.set_xlabel("Size [bp]")
ax.set_ylabel("Fluorescence units", labelpad=27)

timepoints = ("0 hours", "1 hour", "4 hours", "12 hours", "24 hours", "96 hours")

interpolator = read_ladder(datasets[0])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("1_1/11 dilution", "2_1/8 dilution", "3_1/8 dilution", "4_1/7 dilution", "5_1/8 dilution", "6_no dilution", "7_1/6 dilution", "8_1/7 dilution", "9_1/7 dulution")

i = 0
for sample, name in zip(samples, names):
    times, values = read_data(datasets[0], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    ax = fig.add_subplot(6,3,i+1)
    semilogx(sizes, values, color='blue', alpha=0.5)
    yticks(fontsize=8)
    ymin, ymax = ylim()
    plot([200, 200], [ymin, ymax], 'r--')
    plot([390, 390], [ymin, ymax], 'r--')
    xlim(sizes[0], sizes[-1])
    ylim(ymin, ymax)
    xticks([])
    if i % 3 == 0:
        j = i // 3
        timepoint = timepoints[j]
        ylabel(timepoint, fontsize=8)
    if i < 3:
        title("Replicate %d" % (i+1), fontsize=8)
    if i == 5:
        title("Sample negative control", fontsize=8, pad=2)
    i += 1

interpolator = read_ladder(datasets[1])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("10_1/8 dilution", "11_1/7 dilution", "12_1/8 dilution", "13_1/9 dilution", "14_1/7 dilution", "15_1/6 dilution", "16_1/6 dilution", "17_1/7 dilution", "18_1/7 dilution")

for sample, name in zip(samples, names):
    times, values = read_data(datasets[1], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    ax = fig.add_subplot(6,3,i+1)
    semilogx(sizes, values, color='blue', alpha=0.5)
    yticks(fontsize=8)
    ymin, ymax = ylim()
    plot([200, 200], [ymin, ymax], 'r--')
    plot([390, 390], [ymin, ymax], 'r--')
    if i >= 15:
        sizes = (50, 100, 300, 2000, 7000)
        labels = [str(size) for size in sizes]
        xticks(sizes, labels, fontsize=8)
    else:
        xticks([])
    if i % 3 == 0:
        j = i // 3
        timepoint = timepoints[j]
        ylabel(timepoint, fontsize=8)
    xlim(sizes[0], sizes[-1])
    ylim(ymin, ymax)
    i += 1

subplots_adjust(bottom=0.08, top=0.97, left=0.14, right=0.97, wspace=0.3)

filename = "figure_bioanalyzer_hiseq_separate.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_hiseq_separate.png"
print("Saving figure as %s" % filename)
savefig(filename)


fig = figure(figsize=(6,12))

ax = fig.add_subplot(111)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='w', top=False, bottom=False, left=False, right=False)
ax.set_xlabel("Size [bp]")
ax.set_ylabel("Fluorescence units", labelpad=27)

timepoints = ("0 hours", "1 hour", "4 hours", "12 hours", "24 hours", "96 hours")

interpolator = read_ladder(datasets[0])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("1_1/11 dilution", "2_1/8 dilution", "3_1/8 dilution", "4_1/7 dilution", "5_1/8 dilution", "6_no dilution", "7_1/6 dilution", "8_1/7 dilution", "9_1/7 dulution")

i = 0
for sample, name in zip(samples, names):
    times, values = read_data(datasets[0], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    ax = fig.add_subplot(6,3,i+1)
    start = searchsorted(sizes, 180)
    end = searchsorted(sizes, 420)
    plot(sizes[start:end], values[start:end], color='blue', alpha=0.5)
    yticks(fontsize=8)
    ymax = max(values[start: end])
    ymax = 1.1 * ymax
    ymin = min(values[start: end])
    ymin = min(-ymax / 20.0, 1.1 * ymin)
    plot([200, 200], [ymin, ymax], 'r--')
    plot([390, 390], [ymin, ymax], 'r--')
    xlim(180, 420)
    ylim(ymin, ymax)
    ax.xaxis.set_minor_formatter(NullFormatter())
    ax.xaxis.set_major_formatter(NullFormatter())
    if i % 3 == 0:
        j = i // 3
        timepoint = timepoints[j]
        ylabel(timepoint, fontsize=8)
    if i < 3:
        title("Replicate %d" % (i+1), fontsize=8)
    if i == 5:
        title("Sample negative control", fontsize=8, pad=2)
    i += 1

interpolator = read_ladder(datasets[1])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("10_1/8 dilution", "11_1/7 dilution", "12_1/8 dilution", "13_1/9 dilution", "14_1/7 dilution", "15_1/6 dilution", "16_1/6 dilution", "17_1/7 dilution", "18_1/7 dilution")

for sample, name in zip(samples, names):
    times, values = read_data(datasets[1], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    ax = fig.add_subplot(6,3,i+1)
    start = searchsorted(sizes, 180)
    end = searchsorted(sizes, 420)
    plot(sizes[start: end], values[start: end], color='blue', alpha=0.5)
    yticks(fontsize=8)
    ymax = max(values[start: end])
    ymin = min(values[start: end])
    ymax = 1.1 * ymax
    ymin = min(-ymax / 20.0, 1.1 * ymin)
    plot([200, 200], [ymin, ymax], 'r--')
    plot([390, 390], [ymin, ymax], 'r--')
    xlim(180, 420)
    ax.xaxis.set_minor_formatter(NullFormatter())
    if i >= 15:
        sizes = (200, 300, 400)
        labels = [str(size) for size in sizes]
        xticks(sizes, labels, fontsize=8)
    else:
        ax.xaxis.set_major_formatter(NullFormatter())
    if i % 3 == 0:
        j = i // 3
        timepoint = timepoints[j]
        ylabel(timepoint, fontsize=8)
    ylim(ymin, ymax)
    i += 1

subplots_adjust(bottom=0.08, top=0.97, left=0.14, right=0.97, wspace=0.3)

filename = "figure_bioanalyzer_hiseq_separate_zoomed.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_hiseq_separate_zoomed.png"
print("Saving figure as %s" % filename)
savefig(filename)
